import requests
from bs4 import BeautifulSoup
import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from collections import Counter

# 选择任意关于“数据挖掘”或“人工智能”的网页
url = "https://baike.baidu.com/item/数据挖掘/105243"  # 示例：百度百科“数据挖掘”页面

# 采集网页文本
headers = {"User-Agent": "Mozilla/5.0"}
resp = requests.get(url, headers=headers, timeout=10)
resp.encoding = resp.apparent_encoding
soup = BeautifulSoup(resp.text, "html.parser")
text = soup.get_text()

# 使用jieba分词并过滤停用词
words = [w for w in jieba.lcut(text) if len(w) > 1]

# 计算词频
freq = Counter(words).most_common(200)

# 画词云图
wc = WordCloud(font_path="msyh.ttc", width=800, height=600, background_color="white")
wc.generate_from_frequencies(dict(freq))
plt.figure(figsize=(10, 8))
plt.imshow(wc, interpolation="bilinear")
plt.axis("off")
plt.title("数据挖掘词云图", fontsize=16)
plt.show()
